local dirAF ""~/Dropbox/Covid Info Survey/replication_package/""
local dirBZ ""
local dirEF ""
local dirVP ""C:/Users/pezone/AppData/Local/Dropbox/Dropbox/Covid Info Survey/replication_package/""
local dirVPmac ""/Users/vincenzopezone/Dropbox/Covid Info Survey/replication_package/""

cap cd `dirVPmac'
cap cd `dirAF'
cap cd `dirVP'

*** w determines how variables are winsorized
local w 025


/*** Import Excel data (Wave 2) ***/

import excel "survey_2.xlsx", sheet("Sheet0") firstrow clear

keep if gc=="1" | gc=="4"


/*** Generate date and time variables ***/

gen date = dofc(StartDate)
gen hour=hh(StartDate)
gen minute=mm(StartDate)
gen second=ss(StartDate)

*** Drop pilot obs.

drop if Q163_1==""


/*** Screeners ***/

label variable Q169 "region"

label variable Q25 "age"

label variable Q26 "gender"

label variable Q185 "interested in sports"

label variable Q186 "interested in music"

/*** Priors ***/

label variable Q0 "infected by covid"

label variable Q1_1 "worried health"

label variable Q2_1 "worried economy"

label variable Q3_4 "probability infected (prior)"

label variable Q4 "nowcast deaths"

label variable Q5_1 "confident nowcast deaths"

label variable Q6 "forecast deaths (priors)"

label variable Q7_1 "probability deaths>200,000 (prior)"

label variable Q8 "nowcast UE"

label variable Q9_1 "confident nowcast UE"

label variable Q10 "forecast UE (priors)"

label variable Q11_1 "probability UE>20 (prior)"

label variable Q12_1 "measures evaluation"

label variable Q149_4 "sign for contact tracing app"

label variable Q15_1 "pick headlines - T1"

label variable Q15_1_DO "order headlines - T1"

label variable Q15_2 "pick headlines - T2"

label variable Q15_2_DO "order headlines - T2"

label variable Q15_3 "pick headlines - T3"

label variable Q15_3_DO "order headlines - T3"

label variable Q16 "headline just read"

label variable Q16_DO "order headline just read"

label variable Q17_1 "article informativeness"

label variable Q18_1 "article reliability"

label variable Q21_choice1 "advertise NYT - T1"

label variable Q256_1 "advertise Fox - T1"

label variable Q257_1 "advertise NYT - T2"

label variable Q258_1 "advertise Fox - T2"

label variable Q259_1 "advertise NYT - T3"

label variable Q260_1 "advertise Fox - T3"

*** Check time spent on each article

gen time_spent_NYT=time_spent_NYT_1_PageSubmit
replace time_spent_NYT=time_spent_NYT_2_PageSubmit if time_spent_NYT_2_PageSubmit!=.
replace time_spent_NYT=time_spent_NYT_3_PageSubmit if time_spent_NYT_3_PageSubmit!=.


gen time_spent_FOX=time_spent_FOX_1_PageSubmit
replace time_spent_FOX=time_spent_FOX_2_PageSubmit if time_spent_FOX_2_PageSubmit!=.
replace time_spent_FOX=time_spent_FOX_3_PageSubmit if time_spent_FOX_3_PageSubmit!=.

replace time_spent_FOX=time_spent_FOX/60

replace time_spent_NYT=time_spent_NYT/60

local X 8 10
foreach x in `X' {

	drop  if Q`x'>100

}


*** Change in behavior
/*
apps
Q149 prior
Q132 posterior

social distancing
Q152 prior
Q154 posterior
*/
gen d_app=Q132-Q149

gen app_pre=Q149

gen app_pre_100=(Q149==100)

gen app_pre_0=(Q149==0)



*** Generate sample restrictions based on time spent on first questions


gen time_before=0

forval j=1/9 {

	replace time_before=time_before+time`j'_PageSubmit

}

*** Time spent on question Q12 was labeled Q153; we add that as well

replace time_before=time_before+Q153_PageSubmit

gen time_before_minutes=time_before/60

*** For each cutoff, we generate two samples: One that exclude only observations at the bottom, and one that also excludes also at the top,
*** using the same percentile.

local X 5 7.5 10

foreach x in `X' {
	
	egen min=pctile(time_before_minutes), p(`x')
	
	local y=100-`x'
	
	egen max=pctile(time_before_minutes), p(`y')
	
	local z=subinstr("`x'",".","_",.)
	
	local q=subinstr("`y'",".","_",.)

	gen sample_`z'=(time_before_minutes>min)
	
	gen sample_`z'_`q'=(time_before_minutes>min & time_before_minutes<max)
	
	drop min max
	
	local S `S' `z' `z'_`q'
		
}

*** Note: sample_X=sample excludes observations in the bottom Xth percentile;
*** sample_X_Y excludes observations below the Xth percentile and above the Yth percentile



*************************************************************
******* Generate variables used in regressions **************
*************************************************************

*** Main variables of interest

encode Q0,gen(infected)
label variable infected "Already Infected (Cntd)"

gen already_infected=(Q0=="Yes, definitely - confirmed by test" | Q0=="Very likely - did not get tested, but had symptoms")

label variable already_infected "Already Infected"


gen worried_health=  Q1_1 
label variable worried_health "Worried - Health"

gen worried_economy= Q2_1 
label variable worried_economy "Worried - Economy"

gen probability_infected= Q3_4 
label variable probability_infected "Prob. Infected by Year End"

gen nowcast_deaths=Q4/1000
label variable nowcast_deaths "Nowcast Deaths (in 1,000s)"

*** Nowcast deaths takes implausible values; for now, we winsorize it
winsor nowcast_death,p(.01) gen(nowcast_deaths2)
label variable nowcast_deaths2 "Nowcast Deaths"


gen lnowcast_deaths=log(Q4)
label variable lnowcast_deaths "Log(Nowcast Deaths)"

gen confident_nowcast_deaths=real(Q5_1)
replace confident_nowcast_deaths=1 if strpos(Q5_1,"1")>0
replace confident_nowcast_deaths=5 if strpos(Q5_1,"5")>0
replace confident_nowcast_deaths=3 if strpos(Q5_1,"3")>0

label variable confident_nowcast_deaths "Confident in Nowcast Deaths"

gen prob_deaths_200=Q7_1
label variable prob_deaths_200 "Pr. Deaths\textgreater 200K"

gen nowcast_ue=Q8
label variable nowcast_ue "Nowcast UE"

gen confident_nowcast_ue=real(Q9_1)
replace confident_nowcast_ue=1 if strpos(Q9_1,"1")>0
replace confident_nowcast_ue=5 if strpos(Q9_1,"5")>0
replace confident_nowcast_ue=3 if strpos(Q9_1,"3")>0

label variable confident_nowcast_ue "Confident in Nowcast UE"

gen forecast_ue_prior=Q10
label variable forecast_ue_prior "Forecast UE (Prior)"

gen prob_ue_20=Q11
label variable prob_ue_20 "Pr. UE\textgreater 20"


gen article_chosen=Q15_1+Q15_2+Q15_3

gen pessimistic=(strpos(article_chosen,"two years")>0)*100

**************** Demographics ************************

gen female=(Q26=="Female")
label variable female "Female"

encode Q29,gen(degree)
label variable degree "School Degree"

encode Q30,gen(employment)
label variable employment "Employment Situation"

gen employment_loss_hh=(Q31=="Yes")
label variable employment_loss_hh "Empl. Loss in HH"

encode Q34,gen(health)

label variable health "Health"

g Q35x = length(Q35)

g health_conditions = 0 if Q35x==0
replace health_conditions = 1 if inlist(Q35x,28,52,97)
replace health_conditions = 3 if Q35x == 179
replace health_conditions = 2 if health_conditions==.
label variable health_conditions "Health Cnds. (0-3)" // AF changed label



gen health_insurance=(Q36=="Yes")
label variable health_insurance "Health Insurance"

encode Q37,gen(political_views)
label variable political_views "Political Views"

encode Q38,gen(vote_for)
label variable vote_for "Planning to vote for..."

gen married=(Q40=="Yes")
label variable married "Married"

gen zip=real(Q41)

gen own_house=(Q42=="Yes")
label variable own_house "Own Primary Residence"

*** # people in HH (ignore age structure for now)
gen n_hh=real(Q43_1)+real(Q43_2)+real(Q43_3)+real(Q43_4)
replace n_hh = 7 if n_hh>7


label variable n_hh "People in HH"

gen n_old=real(Q43_4)
replace n_old=2 if n_old>2
label variable n_old "Nr Ppl. above 65 in HH" // AF changed label

encode Q44,gen(actions_taken)
label variable actions_taken "Actions Taken So Far"

encode Q45,gen(stock)
label variable stock "Hold Stocks"

gen risk_loving=real(Q46_1)
replace risk_loving=1 if strpos(Q46_1,"1")>0
replace risk_loving=7 if strpos(Q46_1,"7")>0
label variable risk_loving "Willingness to Take Risks"

gen discount=real(Q47_1)
replace discount=1 if strpos(Q47_1,"1")>0
replace discount=7 if strpos(Q47_1,"7")>0
label variable discount "Willingness to Wait"

gen trust=real(Q181)
replace trust=1 if strpos(Q181,"1")>0
replace trust=7 if strpos(Q181,"7")>0
label variable trust "Trust in People"

encode Q212,gen(income)
label variable income "HH Income"
encode Q169,gen(region)
label variable region "Region"

gen age_c=Q25

*** Truncate outliers at the top 99%

quietly sum age_c,d

replace age_c=`r(p99)' if age_c>`r(p99)'
replace age_c=age_c/100

label variable age_c "Age (\(\div 100\))"

gen age_100=age_c*100
label variable age_100 "Age (18-81)"

gen treatment=1 if strpos(FL_215_DO,"1")>0
replace treatment=2 if strpos(FL_215_DO,"2")>0
replace treatment=3 if strpos(FL_215_DO,"3")>0


gen worried=worried_economy+worried_health
label variable worried "Worried H+E"

gen worried_h_e=worried_health-worried_economy
label variable worried_h_e "Worried H-E"


gen revealed=(treatment==3)
label variable revealed "Revealed Before"

gen biden=(Q38=="Joe Biden")
label variable biden "Biden Voter"

*** This is going to turn useful for the loop in the regression tests
gen biden2=(Q38=="Joe Biden")
label variable biden2 "Biden Voter"


gen trump=(Q38=="Donald Trump")
label variable trump "Trump Voter"


gen biden_revealed=biden*revealed
label variable biden_revealed "Biden Voter \(\times\) Revealed Before"

gen biden2_revealed=biden2*revealed
label variable biden2_revealed "Biden Voter \(\times\) Revealed Before"



gen trump_revealed=trump*revealed
label variable trump_revealed "Trump Voter \(\times\) Revealed Before"




*** Headlines order

gen temp=Q15_1_DO+Q15_2_DO+Q15_3_DO
gen order=(substr(temp,1,2)=="Sc" | substr(temp,1,2)==`""S"')
drop temp





*** For predictive regressions, reduce a bit the number of regressors

*** Education

gen bachelor=(Q29=="Bachelor's Degree (For example: BA, BS)")
gen high_school=(Q29=="High school diploma (or equivalent)") 
replace high_school=1 if Q29=="Associate/Junior College Degree (including academic, vocational, or occupational programs)"
replace high_school=1 if  Q29=="Some college but no degree (including academic, vocational, or occupational programs"
replace high_school=1 if  Q29_8_TEXT=="In school for a bachelors degree now" | Q29_8_TEXT=="Professional License" 
replace high_school=1 if  Q29_8_TEXT=="Technical school " |  Q29_8_TEXT=="government run education is indoctrination not education"
replace high_school=1 if  Q29_8_TEXT=="in college now" | Q29_8_TEXT=="I prefer not to answer" 
replace high_school=1 if Q29=="Less than high school"

gen more_than_college=(Q29=="Doctoral Degree"   | Q29=="Master's Degree (For example: MA, MBA, MS, MSW)") 
replace more_than_college=1 if Q29_8_TEXT=="MD" | Q29_8_TEXT=="Juris Doctor"

label variable high_school "At most high school"
label variable bachelor "Bachelor"
label variable more_than_college "More than bachelor"

*** Simplified Education

gen college=more_than_college+ bachelor
label variable college "College"


*** Employment Conditions

gen retired=(Q30=="Retiree or early retiree"   | Q30_10_TEXT=="retired" )
gen part_time=  (Q30=="Working part-time (for someone or self-employed)")
gen full_time=  (Q30=="Working full-time (for someone or self-employed)") 
replace full_time=1 if Q30_10_TEXT=="General Labor" | Q30_10_TEXT=="self employed"
replace full_time=1 if Q30_10_TEXT=="Self employed / Retail and rental" | Q30_10_TEXT=="freelance/contractor"

gen student=  (Q30=="Student, at school, or in training")
gen homemaker=(Q30=="Homemaker")
gen not_work=(Q30=="Not working, but would like to work"  | Q30=="Temporarily laid off" | Q30_10_TEXT=="Disabled" | Q30_10_TEXT=="Furloughed")

replace not_work=1 if Q30_10_TEXT=="I own/operate a seasonal business whose 2020 season has been cancelled"

replace not_work=1 if  Q30_10_TEXT=="Prefer not to answer "

replace not_work=1 if   Q30_10_TEXT=="furloughed" | Q30_10_TEXT=="none"

gen not_work_not_looking=(Q30=="On sick or other leave" | Q30=="Permanently disabled or unable to work")

replace not_work_not_looking=1 if  Q30_10_TEXT=="Unemployed and not looking for work" | Q30_10_TEXT=="disabled" | Q30_10_TEXT=="caregiver" | Q30_10_TEXT=="Not working but caring for parents full-time"

label variable retired "Retired"
label variable part_time "Part Time"
label variable full_time "Full Time"
label variable not_work "Not Working"
label variable not_work_not_looking "Not Working Nor Looking"
label variable student "Student"


*** Simplified employment conditions

gen working=homemaker+part_time+full_time
label variable working "Working"
gen not_working=not_work+not_work_not_l+student
label variable not_working "Not Working"

*** Political Affiliations


gen libs=7 if political_views=="Very Liberal/Democrat":political_views
replace libs=6 if political_views=="Liberal/Democrat":political_views
replace libs=5 if political_views=="Leaning Liberal/Democrat":political_views
replace libs=4 if political_views=="Libertarian":political_views
replace libs=3 if political_views=="Leaning Conservative/Republican":political_views
replace libs=2 if political_views=="Conservative/Republican":political_views
replace libs=1 if political_views=="Very Conservative/Republican":political_views


label variable libs "Liberal Score (1-7)"

gen lib=(libs>=5)

label variable lib "Liberal"

*** Voting Plans

gen biden_voter=(Q38=="Joe Biden")
gen trump_voter=(Q38=="Donald Trump")
gen non_voter=(Q38=="I will not vote for anyone")
gen third_party_voter=(Q38=="Third party candidate")

label variable biden_voter "Vote Biden"
label variable trump_voter "Vote Trump"
label variable non_voter "Non Voter"
label variable third_party_voter "Third Party Voter"



*** Simplified Political Affiliations


gen liberal=(political_views=="Very Liberal/Democrat":political_views | political_views=="Liberal/Democrat":political_views | political_views=="Leaning Liberal/Democrat":political_views)

gen conservative=(political_views=="Conservative/Republican":political_views | political_views=="Very Conservative/Republican":political_views)

gen libertarian=(political_views=="Libertarian":political_views)

label variable libertarian "Libertarian"

label variable conservative "Conservative"

label variable liberal "Liberal"

*** Race: From Andreas' code

// AF: generate indicators for Black, Asian, other minority, and then also Hispanic
gen race_black = regexm(Q33,"Black")
gen race_asian = regexm(Q33,"Asian")
gen race_other_min = regexm(Q33,"Indian")|regexm(Q33,"Hawa")

* br Q33* if regexm(Q33,"specify") // those that selected "other" -- often hispanic

g hispanic = Q32 =="Yes"

label variable race_black "Black"

label variable race_asian "Asian"

label variable race_other_min "Race - Other"

label variable hispanic "Hispanic"




*** Lockdown Severity

gen severity_lockdown=4 if actions_taken=="There is currently a stay-at-home order":actions_taken
replace severity_lockdown=3 if actions_taken=="There was a stay-at-home order which is being gradually lifted (in phases)":actions_taken
replace severity_lockdown=3 if Q44_6_TEXT!=""
replace severity_lockdown=2 if actions_taken=="There was a stay-at-home order which has been lifted entirely":actions_taken
replace severity_lockdown=1 if actions_taken=="There was never a stay-at-home order":actions_taken

replace severity_lockdown=0 if actions_taken=="Don't know":actions_taken

gen lockdown_not_know=(actions_taken=="Don't know":actions_taken)

label variable severity_lockdown "Lockdown Severity"

label variable lockdown_not_know "Lockdown Severity - Not Know"


gen sign_up= Q149_4

*** Generate continuous income variable

gen income_scale=1 if Q212_1=="$10,000 - $19,999"
replace income_scale=2 if Q212_1=="$20,000 - $29,999"
replace income_scale=3 if Q212_1=="$30,000 - $39,999"
replace income_scale=4 if Q212_1=="$40,000 - $49,999"
replace income_scale=5 if Q212_1=="$50,000 - $59,999"
replace income_scale=6 if Q212_1=="$60,000 - $69,999"
replace income_scale=7 if Q212_1=="$70,000 - $79,999"
replace income_scale=8 if Q212_1=="$80,000 - $89,999"
replace income_scale=9 if Q212_1=="$90,000 - $99,999"
replace income_scale=10 if Q212_1=="$100,000 - $149,999"
replace income_scale=11 if Q212_1=="$150,000 - $199,999"
replace income_scale=12 if Q212_1=="$200,000 or more"
replace income_scale=0 if Q212_1=="Less than $10,000"

label variable income_scale "Income"

replace stock=stock-1


*** Evaluation of Article


// Informativeness:
destring Q17_1, ignore("Not informative at all Very") replace
// Reliability
destring Q18_1, ignore("Not reliable at all Completely") replace

gen reliability=Q18_1

gen informativeness=Q17_1

gen money_spent=Q21_choice1_1

gen time_spent_article_minutes=time_spent_FOX
replace time_spent_article_minutes=time_spent_NYT if time_spent_NYT!=.

forval i=256/260 {

replace money_spent=Q`i'_1 if Q`i'_1!=.

}

winsor time_spent_article_minutes,p(.`w') gen(time_article2)



gen pess_article=pessimistic/100
label variable pess_article "Pessimistic"
gen revealed_after=( FL_215_DO =="treatment2")
label variable revealed_after "Rev. After"




*** Source guessing



forval i=1/9 {

		forval j=1/2 {
	
		replace guess_source_`j'_`i'=0 if guess_source_`j'_`i'==.

	}
}
gen p_liberal=guess_source_1_4+guess_source_1_5+guess_source_1_8+guess_source_1_2 if treatment==1
replace p_liberal=guess_source_2_4+guess_source_2_5+guess_source_2_8+guess_source_2_2 if treatment==2
label variable p_liberal "Pr. Liberal"

gen p_conservative=(guess_source_1_1+guess_source_1_3+ guess_source_1_7  ) if treatment==1
replace p_conservative=(guess_source_2_1+guess_source_2_3   +  guess_source_2_7  ) if treatment==2
label variable p_conservative "Pr. Conservative"

gen p_conservative_no_wsj=(guess_source_1_1+guess_source_1_3  ) if treatment==1
replace p_conservative_no_wsj=(guess_source_2_1+guess_source_2_3   ) if treatment==2
label variable p_conservative_no_wsj "Pr. Conservative"





gen p_lib_cons=p_liberal-p_conservative
label variable p_lib_cons "Pr. Lib. - Pr. Cons."

gen p_lib_cons_no_wsj=p_liberal-p_conservative_no_wsj
label variable p_lib_cons_no_wsj "Pr. Lib. - Pr. Cons."



*** Interaction Terms

gen pess_rev=revealed_after*pess_article
label variable pess_rev "Pess. \(\times\) Rev."


local X biden biden2 libs lib trump p_lib_cons

foreach x in `X' {

	gen `x'_rev_bef=`x'*revealed

	gen pess_`x'=`x'*pess_article
	label variable pess_`x' "Pess. \(\times\) `x'"

	gen `x'_rev=revealed_after*`x'
	label variable `x'_rev "`x' \(\times\) Rev."

	*** Key Term
	gen `x'_rev_pess=revealed_after*`x'*pess_article
	label variable `x'_rev_pess "`x' \(\times\) Rev. \(\times\) Pess."

}


 label variable lib_rev_bef "Liberal \(\times\) Rev. Before"

 
 
 
 label variable pess_lib "Pess. \(\times\) Liberal"
 label variable lib_rev "Liberal \(\times\) Rev."
 label variable lib_rev_pess "Liberal \(\times\) Rev. \(\times\) Pess."

 
*** Build variables for revision regressions
/// revisions (probabilistic)

g p_death_rev = Q23_1 - Q7_1
g Q7_1_100 = Q7_1 ==100 
g Q7_1_0   = Q7_1 ==0 

g p_deaths_rev=Q23_1 - Q7_1


compare Q25_1 Q11_1 
g p_ur_rev = Q25_1 - Q11_1 
g Q11_1_100 = Q11_1 ==100 
g Q11_1_0   = Q11_1 ==0 

g p_ue_rev = Q25_1 - Q11_1 


*** Use names easier to remember

gen p_deaths_prior=Q7_1
label variable p_deaths_prior "Pr. Deaths. Prior"

gen p_deaths_posterior=Q23_1
label variable p_deaths_posterior "Pr. Deaths. Post."

gen p_ue_prior=Q11_1
label variable p_ue_prior "Pr. UE Prior"

gen p_ue_posterior=Q25_1
label variable p_ue_posterior "Pr. UE Post."


// lockdown support:

g support = substr(Q136,1,1)
destring support , replace
replace support = 6-support // code such that higher = more support for future lockdown

g support_sofar = substr(Q12_1 ,1,1)
destring support_sofar , replace

label variable support_sofar "Lockdown Support"

g d_support =support-support_sofar
label variable d_support "Support Diff."

label variable time_article2 "Time Spent"



*** Deaths and UE Forecasts


gen ue_prior=Q10
label variable ue_prior "UE Prior"

gen ue_100=(Q10==100)
label variable ue_100 "UE Prior=100"

gen ue_0=(Q10==0)
label variable ue_0 "UE Prior=0"



gen prob_ue_20_100=(Q11_1==100)
label variable prob_ue_20_100 "Prob. UE=100"

gen prob_ue_20_0=(Q11_1==0)
label variable prob_ue_20_0 "Prob. UE=0"

gen prob_deaths_200_100=(Q7_1==100)
label variable prob_deaths_200_100 "Prob. Deaths=100"
gen prob_deaths_200_0=(Q7_1==0)
label variable prob_deaths_200_100 "Prob. Deaths=0"



gen deaths_posterior_nw=Q22/1000

winsor deaths_posterior_nw,p(.`w') gen(deaths_posterior)


gen deaths_prior_nw=real(subinstr(Q6,",","",.))/1000

winsor deaths_prior_nw,p(.`w') gen(deaths_prior)
label variable deaths_prior "Deaths Prior"

gen deaths_rev=deaths_posterior-deaths_prior



gen log_deaths_prior_nw=log(deaths_prior_nw)

gen log_deaths_prior=log(deaths_prior)

gen log_deaths_posterior_nw=log(deaths_posterior_nw)

gen log_deaths_posterior=log(deaths_posterior)

gen log_deaths_rev=log_deaths_posterior-log_deaths_prior


gen ue_posterior=Q24

gen ue_rev=ue_posterior-ue_prior

gen p_inf_rev=Q26_4-Q3_4



gen p_inf_prior=Q3_4

gen p_inf_posterior=Q26_4


gen inf_0=(p_inf_prior==0)

gen inf_100=(p_inf_prior==100)


*** Gen dummy for full sample and for those who got the headline check right

gen all=1

gen headline_ok=1 if strpos(article_chosen,"optimistic")>0 & strpos(Q16,"optimistic")>0
replace headline_ok=1 if strpos(article_chosen,"two years")>0 & strpos(Q16,"two years")>0

replace headline_ok=0 if headline_ok==.

label variable headline_ok "Headline Check"



*** Merge with county data

rename zip prop_zip

merge m:1 prop_zip using  zipTOcounty,keep(1 3)

gen fips=real(fips)

replace fips=45015 if prop_zip==29486

replace fips=48121 if prop_zip==75036

drop _merge


*** NYT has data for New York city, not its five counties, so we assign a fake FIPS code to its counties

replace fips=100000 if fips== 36005 | fips==36047  | fips== 36061  | fips==  36081 | fips== 36085 

preserve

use infections_population_alldates,clear

replace fips=100000 if county=="New York City"
drop if fips==.

*** Use lags to make sure individuals are aware of data on infections/deaths
replace date =date+1

gen cases_pc=cases/population

gen deaths_pc=deaths/population

keep fips date deaths_pc cases_pc

save temp,replace

restore

merge m:1 fips date using temp,keep(1 3)

drop _merge

*** Assume deaths and infections are equal to zero if unmatched; we can also drop these obs.

gen unmatched_county=1 if deaths_pc==.

replace deaths_pc=deaths_pc*1000
label variable deaths_pc "Deaths P.C. (\(\times\)1000)"

replace cases_pc=cases_pc*1000
label variable cases_pc "Cases P.C.  (\(\times\)1000)"
replace deaths_pc=0 if deaths_pc==.
replace cases_pc=0 if cases_pc==.


preserve

import excel bls_ur.xlsx, sheet("laucntycur14") firstrow  clear

gen fips_code=state_fips+ county_fips

replace period=substr(period,1,6)

split period,p("-")

gen year=real("20"+period2)

gen month=1 if period1=="Jan"
replace month=2 if period1=="Feb"
replace month=3 if period1=="Mar"
replace month=4 if period1=="Apr"
replace month=5 if period1=="May"
replace month=6 if period1=="Jun"
replace month=7 if period1=="Jul"
replace month=8 if period1=="Aug"
replace month=9 if period1=="Sep"
replace month=10 if period1=="Oct"
replace month=11 if period1=="Nov"
replace month=12 if period1=="Dec"

gen ue_county=real(ur)
label variable ue_county "County UR"

gen ym=ym(year,month)

egen id=group(fips_code)

drop if  id==.

xtset id ym

forval i=1/6 {

	gen ue_l`i'=l`i'.ue_county if month==4
	
	gen ue_f`i'=f`i'.ue_county if month==4

}

gen ue_l7=l7.ue_county if month==4


keep if month==4

gen ue_diff=ue_county-ue_l6

keep fips_code ue_*

save ue_county,replace

restore

merge m:1 fips_code using ue_county,keep(1 3)


*** Use national average if missing

replace ue_county=14.7 if ue_county==.

replace ue_diff=14.7-3.6 if ue_county==.



*** Run PCA for each sample; the local X includes all variables we want to put; for now, we include only the "bounded ones"
*** We also standardize it for ease of intepretation

local X  p_deaths_prior p_ue_prior deaths_prior ue_prior nowcast_deaths2 nowcast_ue probability_infected

foreach s in `S' {

	pca `X' if sample_`s'==1

	predict pc_`s' pc2_`s' pc3_`s' pc4_`s' if sample_`s'==1
	
	quietly sum pc_`s'
	
	replace pc_`s'=pc_`s'/`r(sd)'
	
	label variable pc_`s' "PC1"
	
	gen pc_`s'_revealed=pc_`s'*revealed
	label variable pc_`s'_revealed "PC1 \(\times\) Rev. Before"

}
/*** Fraction explained by first four principal components:

0.3385

0.2465

0.1749

0.0975

***/


*** Let's do the same for the ''assessment'' variables

local X reliability informativeness money

foreach s in `S' {

	pca `X' if sample_`s'==1

	predict pr_`s' if sample_`s'==1
	
	quietly sum pr_`s'
	
	replace pr_`s'=pr_`s'/`r(sd)'
	
	label variable pr_`s' "PC Reliability"

}


gen all_t=1
gen no3=1 if treatment!=3



gen support_sofar_revealed=support_sofar*revealed
label variable support_sofar_revealed  "Lockdown S. \(\times\) Rev. Before"

gen worried_revealed=worried*revealed
label variable worried_revealed  "Worried H+E \(\times\) Rev. Before"

**** Relabel main variables

label variable nowcast_deaths2 "Nowcast Deaths"
label variable deaths_prior "Forecast Deaths"
label variable p_deaths_prior "Pr(Deaths \(>\) 200k)"
label variable nowcast_ue "Nowcast UR"
label variable ue_prior "Forecast UR"
label variable p_ue_prior "Pr(UR \(>\) 20\%)"
label variable probability_infected "Pr(COVID inf.)"
label variable p_inf_posterior "Pr(COVID inf.) (Post.)"
label variable p_deaths_rev "\(\Delta\)Pr(Deaths \(>\) 200k)"
label variable p_ue_rev "\(\Delta\)Pr(UR \(>\) 20\%)"
label variable p_inf_rev "\(\Delta\)Pr. Infected"

gen white=(Q33=="White")
label variable white "White"

gen unemployed=(Q30=="Not working, but would like to work")
label variable unemployed "Unemployed"

gen lockdown=(actions_taken==3)

label variable lockdown "Stay at Home Order"

replace health=-health+6

gen ur10=ue_county/10

label variable ur10 "County UR (\(\div 10\))"


gen pc_revealed=pc_10*revealed
	
label variable pc_revealed "PC1 \(\times\) Source Revealed"

label variable sign_up "Pr. Sign Up"

label variable p_inf_rev "\(\Delta\)Pr. Infected"

save data_wave2,replace

*** erase intermediate datasets

erase ue_county.dta

erase temp.dta
